import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel('D:\python work\py\stock.xlsx',dtype={'code':'str'})
df.set_index('code',inplace=True)
print(df.loc['002522'])
print('行业数：', len(df.industry.unique()))#统计股票包含的所有行业
print('地区数：',len(df.area.unique()))#统计股票包含的所有地区
print(df.groupby('area').size().sort_values(ascending=False))#按地区统计上市公司的数量
year = df.timeToMarket.astype('str').str[:4]
yearnum = df.groupby(year).size()#按年份统计每年股票发行量
print(yearnum)
print(yearnum[yearnum.index!='0'].plot(fontsize=14,title='年IPO数量'))
print(df.pe.mean())#计算市盈率
print(df[df.pe>0].pe.mean())#剔除亏损股票
df['tvalue'] = 4 * df.esp * df.pe * df.totals#计算总市值
print(np.sum(df.pe * df.tvalue) / df.tvalue.sum())
df['board'] = df.index.str[:2]
print(df.groupby('board').pe.agg([('pe均值','mean'),('股票数','count')]))
